import numpy as np
import pandas as pd
# sklearn
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons, make_circles, make_classification
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD, Adagrad
from keras.utils.vis_utils import plot_model
from keras.utils import to_categorical
# Visualisation libraries
## matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
## seaborn
import seaborn as sns
sns.set_context("paper", rc={"font.size":12,"axes.titlesize":14,"axes.labelsize":12})
sns.set_style("white")
## plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
%config InlineBackend.figure_format = 'retina'
import warnings
warnings.filterwarnings("ignore")
Machine learning can be categorized as supervised, semi-supervised, or unsupervised learning methods. Deep learning can be supervised or unsupervised, and it is based on artificial neural networks (ANN) with representation learning. In this article, we demonstrate implementing a deep neural network (DNN) in Keras.
Deep Neural Networks (DNNs), which is also known as convolutional networks, is comprised of several layers of nonlinear operations. The goal of deep learning methods is to learn feature hierarchies. Higher levels of features are formed by combining lower level features [1].
a random n-class classification dataset can be generated using sklearn.datasets.make_classification. Here, we generate a dataset with two features and 6000 instances. Moreover, the dataset is generated for multiclass classification with five classes.
X, y = make_classification(n_samples = int(6e3), n_features=2, n_redundant=0, n_classes = 4,
n_informative=2, random_state=1, n_clusters_per_class=1)
Labels = np.unique(y)
Labels = [str(x) for x in Labels]
One of the efficient methods of splitting a dataset into random train and test subsets is using sklearn.model_selection.train_test_split.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
y_train = to_categorical(y_train, num_classes=len(Labels))
y_test = to_categorical(y_test, num_classes=len(Labels))
pd.DataFrame(data={'Set':['X_train','X_test','y_train','y_test'],
'Shape':[X_train.shape, X_test.shape, y_train.shape, y_test.shape]}).set_index('Set').T

A multi-layer perceptron (MLP) is a class of feedforward artificial neural network (ANN). scikit-learn.org has a well-written article regarding MLP and interested readers are encouraged to see this article.
Moreover, in this article, we present a multi-class MLP using Keras and focus on implementing this method in Keras. We define our model by using Sequential class. Moreover, we consider the rectified linear unit) (ReLU) as the activation function. An activation function allows for complex relationships in the data to be learned. For the last year, we use the softmax function, also known as softargmax or normalized exponential function.
model = Sequential(name = 'Multi-Class MLP')
model.add(Dense(64, input_dim = X.shape[1], activation='relu', name='Layer1'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu', name='Layer2'))
model.add(Dropout(0.5))
model.add(Dense(len(Labels), activation='softmax', name='Layer3'))
model.summary()
plot_model(model, show_shapes=True, show_layer_names=True, expand_nested = True)
# Number of iterations
IT = int(1e3)+1
model.compile(optimizer= SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True),
loss='categorical_crossentropy',
metrics=['accuracy','mae', 'mse'])
# Train model
history = model.fit(X_train, y_train, epochs= IT, batch_size=128, verbose = 0)
# Predications and Score
y_pred = model.predict(X_test)
score = model.evaluate(X_test, y_test, batch_size=128)
score = pd.DataFrame(score, index = model.metrics_names).T
history = pd.DataFrame(history.history)
history.reset_index(drop = False, inplace = True)
history.columns =['Iteration', 'Loss', 'Accuracy', 'MAE', 'MSE']
display(score.style.hide_index().set_precision(4))
Let's define some function by which we can analyze the performance of the modeling.
def Plot_history(history, Table_Rows = 25):
fig = make_subplots(rows=1, cols=2, horizontal_spacing = 0.02, column_widths=[0.6, 0.4],
specs=[[{"type": "scatter"},{"type": "table"}]])
# Left
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history['Loss'].values,
line=dict(color='OrangeRed', width= 1.5), name = 'Loss'), 1, 1)
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history['Accuracy'].values,
line=dict(color='MidnightBlue', width= 1.5), name = 'Accuracy'), 1, 1)
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history['MAE'].values,
line=dict(color='ForestGreen', width= 1.5), name = 'Mean Absolute Error (MAE)'), 1, 1)
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history['MSE'].values,
line=dict(color='purple', width= 1.5), name = 'Mean Squared Error (MSE)'), 1, 1)
fig.update_layout(legend=dict(x=0, y=1.1, traceorder='reversed', font_size=12),
dragmode='select', plot_bgcolor= 'white', height=600, hovermode='closest',
legend_orientation='h')
fig.update_xaxes(range=[history.Iteration.min(), history.Iteration.max()],
showgrid=True, gridwidth=1, gridcolor='Lightgray',
showline=True, linewidth=1, linecolor='Lightgray', mirror=True, row=1, col=1)
fig.update_yaxes(range=[0, 1], showgrid=True, gridwidth=1, gridcolor='Lightgray',
showline=True, linewidth=1, linecolor='Lightgray', mirror=True, row=1, col=1)
# Right
ind = np.linspace(0, history.shape[0], Table_Rows, endpoint = False).round(0).astype(int)
ind = np.append(ind, history.Iteration.values[-1])
history = history[history.index.isin(ind)]
fig.add_trace(go.Table(header=dict(values = list(history.columns), line_color='darkslategray',
fill_color='DimGray', align=['center','center'],
font=dict(color='white', size=12), height=25), columnwidth = [0.4, 0.4, 0.4, 0.4],
cells=dict(values=[history.Iteration, history.Loss.round(4),
history.Accuracy.round(4), history.MAE.round(4), history.MSE.round(4)],
line_color='darkslategray', fill=dict(color=['WhiteSmoke', 'white']),
align=['center', 'center'], font_size=12,height=20)), 1, 2)
fig.show()
def Confusion_Matrix(Model, X, y, Labels, FG = (14, 5)):
fig, ax = plt.subplots(1, 2, figsize=FG)
y_pred = Model.predict(X)
if y.shape[1] > 1:
CM = confusion_matrix(y.argmax(axis = 1), y_pred.argmax(axis = 1))
else:
CM = confusion_matrix(y, np.round(y_pred))
_ = sns.heatmap(CM.round(2), annot=True, annot_kws={"size": 14}, cmap="Blues", ax = ax[0])
_ = ax[0].set_xlabel('Predicted labels')
_ = ax[0].set_ylabel('True labels');
_ = ax[0].set_title('Confusion Matrix');
_ = ax[0].xaxis.set_ticklabels(Labels)
_ = ax[0].yaxis.set_ticklabels(Labels)
CM = CM.astype('float') / CM.sum(axis=1)[:, np.newaxis]
_ = sns.heatmap(CM.round(2), annot=True, annot_kws={"size": 14}, cmap="Greens", ax = ax[1],
linewidths = 0.2, vmin=0, vmax=1, cbar_kws={"shrink": 1})
_ = ax[1].set_xlabel('Predicted labels')
_ = ax[1].set_ylabel('True labels');
_ = ax[1].set_title('Normalized Confusion Matrix');
_ = ax[1].xaxis.set_ticklabels(Labels)
_ = ax[1].yaxis.set_ticklabels(Labels)
return fig, ax
def Plot_Classification(Model, X, y, Labels, BP = .5, Alpha=0.6, ax = False, fs = 7, ColorMap = 'Spectral'):
h=0.02
pad=0.25
# adding margins
x_min, x_max = X[:, 0].min()-pad, X[:, 0].max()+pad
y_min, y_max = X[:, 1].min()-pad, X[:, 1].max()+pad
# Generating meshgrids
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predictions
Pred = Model.predict(np.c_[xx.ravel(), yy.ravel()])
if y.shape[1] > 1:
Pred = Pred.argmax(axis = 1)
Pred = Pred.reshape(xx.shape)
# Figure
if ax == False:
fig, ax = plt.subplots(1, 1, figsize=(fs, fs))
_ = ax.contourf(xx, yy, Pred, cmap = ColorMap, alpha=0.2)
if y.shape[1] > 1:
y = y.argmax(axis = 1)
else:
y = np.round(y).T[0]
scatter = ax.scatter(X[:,0], X[:,1], s=70, c=y, edgecolor = 'Navy', alpha = Alpha, cmap = ColorMap)
_ = ax.legend(handles=scatter.legend_elements()[0], labels= Labels,
fancybox=True, framealpha=1, shadow=True, borderpad=BP, loc='best', fontsize = 14)
_ = ax.set_xlim(x_min, x_max)
_ = ax.set_ylim(y_min, y_max)
_ = ax.set_xlabel(r'$X_1$')
_ = ax.set_ylabel(r'$X_2$')
Plot_history(history)
The confusion matrix allows for visualization of the performance of an algorithm.
fig, _ = Confusion_Matrix(model, X_train, y_train, Labels)
_ = fig.suptitle('Train Set', fontsize = 16)
fig, _ = Confusion_Matrix(model, X_test, y_test, Labels)
_ = fig.suptitle('Test Set', fontsize = 16)
fig, ax = plt.subplots(1, 2, figsize=(16, 7))
# Train Set
Plot_Classification(model, X_train, y_train, Labels, BP = .2, ax = ax[0])
_ = ax[0].set_title('Train Set', fontsize = 16)
# Test Set
Plot_Classification(model, X_test, y_test, Labels, BP = .2, ax = ax[1])
_ = ax[1].set_title('Test Set', fontsize = 16)